{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T07:28:48.704155Z",
     "start_time": "2021-10-15T07:28:48.438450Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T07:28:54.459714Z",
     "start_time": "2021-10-15T07:28:48.771441Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.\n",
      "Attempting to start a local H2O server...\n",
      "  Java Version: java version \"1.8.0_11\"; Java(TM) SE Runtime Environment (build 1.8.0_11-b12); Java HotSpot(TM) 64-Bit Server VM (build 25.11-b03, mixed mode)\n",
      "  Starting server from /home/caihengxing/anaconda3/lib/python3.7/site-packages/h2o/backend/bin/h2o.jar\n",
      "  Ice root: /tmp/tmp6jxjjaj_\n",
      "  JVM stdout: /tmp/tmp6jxjjaj_/h2o_caihengxing_started_from_python.out\n",
      "  JVM stderr: /tmp/tmp6jxjjaj_/h2o_caihengxing_started_from_python.err\n",
      "  Server is running at http://127.0.0.1:54321\n",
      "Connecting to H2O server at http://127.0.0.1:54321 ... successful.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O_cluster_uptime:</td>\n",
       "<td>02 secs</td></tr>\n",
       "<tr><td>H2O_cluster_timezone:</td>\n",
       "<td>Asia/Shanghai</td></tr>\n",
       "<tr><td>H2O_data_parsing_timezone:</td>\n",
       "<td>UTC</td></tr>\n",
       "<tr><td>H2O_cluster_version:</td>\n",
       "<td>3.32.1.5</td></tr>\n",
       "<tr><td>H2O_cluster_version_age:</td>\n",
       "<td>2 months and 10 days </td></tr>\n",
       "<tr><td>H2O_cluster_name:</td>\n",
       "<td>H2O_from_python_caihengxing_05yfb9</td></tr>\n",
       "<tr><td>H2O_cluster_total_nodes:</td>\n",
       "<td>1</td></tr>\n",
       "<tr><td>H2O_cluster_free_memory:</td>\n",
       "<td>26.67 Gb</td></tr>\n",
       "<tr><td>H2O_cluster_total_cores:</td>\n",
       "<td>40</td></tr>\n",
       "<tr><td>H2O_cluster_allowed_cores:</td>\n",
       "<td>40</td></tr>\n",
       "<tr><td>H2O_cluster_status:</td>\n",
       "<td>accepting new members, healthy</td></tr>\n",
       "<tr><td>H2O_connection_url:</td>\n",
       "<td>http://127.0.0.1:54321</td></tr>\n",
       "<tr><td>H2O_connection_proxy:</td>\n",
       "<td>{\"http\": null, \"https\": null}</td></tr>\n",
       "<tr><td>H2O_internal_security:</td>\n",
       "<td>False</td></tr>\n",
       "<tr><td>H2O_API_Extensions:</td>\n",
       "<td>Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4</td></tr>\n",
       "<tr><td>Python_version:</td>\n",
       "<td>3.7.3 final</td></tr></table></div>"
      ],
      "text/plain": [
       "--------------------------  ------------------------------------------------------------------\n",
       "H2O_cluster_uptime:         02 secs\n",
       "H2O_cluster_timezone:       Asia/Shanghai\n",
       "H2O_data_parsing_timezone:  UTC\n",
       "H2O_cluster_version:        3.32.1.5\n",
       "H2O_cluster_version_age:    2 months and 10 days\n",
       "H2O_cluster_name:           H2O_from_python_caihengxing_05yfb9\n",
       "H2O_cluster_total_nodes:    1\n",
       "H2O_cluster_free_memory:    26.67 Gb\n",
       "H2O_cluster_total_cores:    40\n",
       "H2O_cluster_allowed_cores:  40\n",
       "H2O_cluster_status:         accepting new members, healthy\n",
       "H2O_connection_url:         http://127.0.0.1:54321\n",
       "H2O_connection_proxy:       {\"http\": null, \"https\": null}\n",
       "H2O_internal_security:      False\n",
       "H2O_API_Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4\n",
       "Python_version:             3.7.3 final\n",
       "--------------------------  ------------------------------------------------------------------"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import h2o\n",
    "from h2o.automl import H2OAutoML\n",
    "h2o.init()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T07:28:54.489470Z",
     "start_time": "2021-10-15T07:28:54.485847Z"
    }
   },
   "outputs": [],
   "source": [
    "train_path = '../autox/data/grocery_sales/train.csv'\n",
    "test_path = '../autox/data/grocery_sales/test.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T07:28:59.374977Z",
     "start_time": "2021-10-15T07:28:54.521326Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n",
      "Parse progress: |█████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "# Load data into H2O\n",
    "train = h2o.import_file(train_path)\n",
    "test  = h2o.import_file(test_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T07:28:59.522218Z",
     "start_time": "2021-10-15T07:28:59.447556Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th style=\"text-align: right;\">  id</th><th>date               </th><th style=\"text-align: right;\">  store_nbr</th><th>family      </th><th style=\"text-align: right;\">  sales</th><th style=\"text-align: right;\">  onpromotion</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td style=\"text-align: right;\">   0</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>AUTOMOTIVE  </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   1</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>BABY CARE   </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   2</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>BEAUTY      </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   3</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>BEVERAGES   </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   4</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>BOOKS       </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   5</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>BREAD/BAKERY</td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   6</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>CELEBRATION </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   7</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>CLEANING    </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   8</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>DAIRY       </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "<tr><td style=\"text-align: right;\">   9</td><td>2013-01-01 00:00:00</td><td style=\"text-align: right;\">          1</td><td>DELI        </td><td style=\"text-align: right;\">      0</td><td style=\"text-align: right;\">            0</td></tr>\n",
       "</tbody>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T07:28:59.572821Z",
     "start_time": "2021-10-15T07:28:59.570604Z"
    }
   },
   "outputs": [],
   "source": [
    "y = \"sales\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T09:18:06.880025Z",
     "start_time": "2021-10-15T07:28:59.618826Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AutoML progress: |████████████████████████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "aml2 = H2OAutoML(max_runtime_secs = 7200, seed = 1, project_name = \"kaggle_grocery_sales\")\n",
    "aml2.train(y = y, training_frame = train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T09:18:07.220208Z",
     "start_time": "2021-10-15T09:18:06.935521Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "stackedensemble prediction progress: |████████████████████████████████████| 100%\n"
     ]
    }
   ],
   "source": [
    "pred = aml2.predict(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T09:18:07.308795Z",
     "start_time": "2021-10-15T09:18:07.274769Z"
    }
   },
   "outputs": [],
   "source": [
    "sub = pd.read_csv(test_path)\n",
    "sub = sub[['id']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T09:18:07.482935Z",
     "start_time": "2021-10-15T09:18:07.379341Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "pred = h2o.as_list(pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T09:18:07.600760Z",
     "start_time": "2021-10-15T09:18:07.561007Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "sub[y] = pred['predict']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2021-10-15T09:18:07.787904Z",
     "start_time": "2021-10-15T09:18:07.681601Z"
    }
   },
   "outputs": [],
   "source": [
    "sub.to_csv(\"./h2o_sub_kaggle_grocery_sales.csv\", index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
